home *** CD-ROM | disk | FTP | other *** search
/ Enigma Amiga Life 109 / EnigmaAmiga109CD.iso / dalla rivista / host contacted / jikes.lha / jikes-1.11 / src / scanner.cpp < prev    next >
C/C++ Source or Header  |  2000-01-06  |  52KB  |  1,577 lines

  1. // $Id: scanner.cpp,v 1.11 2000/01/06 08:24:30 lord Exp $
  2. //
  3. // This software is subject to the terms of the IBM Jikes Compiler
  4. // License Agreement available at the following URL:
  5. // http://www.ibm.com/research/jikes.
  6. // Copyright (C) 1996, 1998, International Business Machines Corporation
  7. // and others.  All Rights Reserved.
  8. // You must accept the terms of that agreement to use this software.
  9. //
  10. #include "config.h"
  11. #include "scanner.h"
  12. #include "control.h"
  13. #include "error.h"
  14.  
  15. int (*Scanner::scan_keyword[13]) (wchar_t *p1) =
  16. {
  17.     ScanKeyword0,
  18.     ScanKeyword0,
  19.     ScanKeyword2,
  20.     ScanKeyword3,
  21.     ScanKeyword4,
  22.     ScanKeyword5,
  23.     ScanKeyword6,
  24.     ScanKeyword7,
  25.     ScanKeyword8,
  26.     ScanKeyword9,
  27.     ScanKeyword10,
  28.     ScanKeyword0,
  29.     ScanKeyword12
  30. };
  31.  
  32.  
  33. //
  34. // The constructor initializes all utility variables.
  35. //
  36. Scanner::Scanner(Control &control_) : control(control_)
  37. {
  38.     //
  39.     // If this assertion fails, the Token structure in stream.h must be redesigned !!!
  40.     //
  41.     assert(NUM_TERMINALS < 128);
  42.  
  43.     //
  44.     // -------------------------------------------------------------------------------
  45.     // We are pulling this code out because we are tired of defending it. We
  46.     // tought it was obvious that either $ should not have been used for compiler
  47.     // generated variables or that users should not be allowed to use in variable names...
  48.     // -------------------------------------------------------------------------------
  49.     //
  50.     // For version 1.1 or above a $ may not be used as part of an identifier name
  51.     // unless the user specifically requests that it be allowed.
  52.     //
  53.     //    if (! control.option.dollar)
  54.     //        Code::SetBadCode(U_DOLLAR);
  55.     //
  56.  
  57.     //
  58.     // CLASSIFY_TOKEN is a mapping from each character into a
  59.     // classification routine that is invoked when that character
  60.     // is the first character encountered in a token.
  61.     //
  62.     for (int c = 0; c < 128; c++)
  63.     {
  64.         if (Code::IsAlpha(c))
  65.              classify_token[c] = &Scanner::ClassifyId;
  66.         else if (Code::IsDigit(c))
  67.              classify_token[c] = &Scanner::ClassifyNumericLiteral;
  68.         else classify_token[c] = &Scanner::ClassifyBadToken;
  69.     }
  70.     classify_token[128] = &Scanner::ClassifyNonAsciiUnicode;
  71.  
  72.     classify_token[U_a] = &Scanner::ClassifyIdOrKeyword;
  73.     classify_token[U_b] = &Scanner::ClassifyIdOrKeyword;
  74.     classify_token[U_c] = &Scanner::ClassifyIdOrKeyword;
  75.     classify_token[U_d] = &Scanner::ClassifyIdOrKeyword;
  76.     classify_token[U_e] = &Scanner::ClassifyIdOrKeyword;
  77.     classify_token[U_f] = &Scanner::ClassifyIdOrKeyword;
  78.     classify_token[U_g] = &Scanner::ClassifyIdOrKeyword;
  79.     classify_token[U_i] = &Scanner::ClassifyIdOrKeyword;
  80.     classify_token[U_l] = &Scanner::ClassifyIdOrKeyword;
  81.     classify_token[U_n] = &Scanner::ClassifyIdOrKeyword;
  82.     classify_token[U_p] = &Scanner::ClassifyIdOrKeyword;
  83.     classify_token[U_r] = &Scanner::ClassifyIdOrKeyword;
  84.     classify_token[U_s] = &Scanner::ClassifyIdOrKeyword;
  85.     classify_token[U_t] = &Scanner::ClassifyIdOrKeyword;
  86.     classify_token[U_v] = &Scanner::ClassifyIdOrKeyword;
  87.     classify_token[U_w] = &Scanner::ClassifyIdOrKeyword;
  88.  
  89.     classify_token[U_SINGLE_QUOTE]       = &Scanner::ClassifyCharLiteral;
  90.     classify_token[U_DOUBLE_QUOTE]       = &Scanner::ClassifyStringLiteral;
  91.  
  92.     classify_token[U_PLUS]               = &Scanner::ClassifyPlus;
  93.     classify_token[U_MINUS]              = &Scanner::ClassifyMinus;
  94.     classify_token[U_EXCLAMATION]        = &Scanner::ClassifyNot;
  95.     classify_token[U_PERCENT]            = &Scanner::ClassifyMod;
  96.     classify_token[U_CARET]              = &Scanner::ClassifyXor;
  97.     classify_token[U_AMPERSAND]          = &Scanner::ClassifyAnd;
  98.     classify_token[U_STAR]               = &Scanner::ClassifyStar;
  99.     classify_token[U_BAR]                = &Scanner::ClassifyOr;
  100.     classify_token[U_TILDE]              = &Scanner::ClassifyComplement;
  101.     classify_token[U_SLASH]              = &Scanner::ClassifySlash;
  102.     classify_token[U_GREATER]            = &Scanner::ClassifyGreater;
  103.     classify_token[U_LESS]               = &Scanner::ClassifyLess;
  104.     classify_token[U_LEFT_PARENTHESIS]   = &Scanner::ClassifyLparen;
  105.     classify_token[U_RIGHT_PARENTHESIS]  = &Scanner::ClassifyRparen;
  106.     classify_token[U_LEFT_BRACE]         = &Scanner::ClassifyLbrace;
  107.     classify_token[U_RIGHT_BRACE]        = &Scanner::ClassifyRbrace;
  108.     classify_token[U_LEFT_BRACKET]       = &Scanner::ClassifyLbracket;
  109.     classify_token[U_RIGHT_BRACKET]      = &Scanner::ClassifyRbracket;
  110.     classify_token[U_SEMICOLON]          = &Scanner::ClassifySemicolon;
  111.     classify_token[U_QUESTION]           = &Scanner::ClassifyQuestion;
  112.     classify_token[U_COLON]              = &Scanner::ClassifyColon;
  113.     classify_token[U_COMMA]              = &Scanner::ClassifyComma;
  114.     classify_token[U_DOT]                = &Scanner::ClassifyPeriod;
  115.     classify_token[U_EQUAL]              = &Scanner::ClassifyEqual;
  116.  
  117.     return;
  118. }
  119.  
  120.  
  121. //
  122. // Associate a lexical stream with this file
  123. //
  124. void Scanner::Initialize(FileSymbol *file_symbol)
  125. {
  126.     lex = new LexStream(control, file_symbol);
  127.     lex -> Reset();
  128.  
  129.     current_token_index = lex -> GetNextToken(0); // Get 0th token !
  130.     current_token = &(lex -> token_stream[current_token_index]);
  131.     current_token -> SetKind(0);
  132.  
  133.     if (control.option.comments)
  134.     {
  135.         LexStream::Comment *current_comment = &(lex -> comment_stream.Next()); // add 0th comment !
  136.         current_comment -> string = NULL;
  137.         current_comment -> length = 0;
  138.         current_comment -> previous_token = -1; // No token precedes this comment
  139.         current_comment -> location = 0;
  140.     }
  141.  
  142.     lex -> line_location.Next() = 0; // mark starting location of line # 0
  143.  
  144.     return;
  145. }
  146.  
  147.  
  148. //
  149. // This is one of the main entry point for the Java lexical analyser.
  150. // Its input is the name of a regular text file. Its output is a stream
  151. // of tokens.
  152. //
  153. void Scanner::SetUp(FileSymbol *file_symbol)
  154. {
  155.     Initialize(file_symbol);
  156.     lex -> CompressSpace();
  157.     file_symbol -> lex_stream = lex;
  158.  
  159.     return;
  160. }
  161.  
  162.  
  163. //
  164. // This is one of the main entry point for the Java lexical analyser.
  165. // Its input is the name of a regular text file. Its output is a stream
  166. // of tokens.
  167. //
  168. void Scanner::Scan(FileSymbol *file_symbol)
  169. {
  170.     Initialize(file_symbol);
  171.  
  172.     lex -> ReadInput();
  173.  
  174.     cursor = lex -> InputBuffer();
  175.     if (cursor)
  176.     {
  177.         Scan();
  178.  
  179.         lex -> CompressSpace();
  180.  
  181.         //
  182.         //
  183.         //
  184.         if (control.option.dump_errors)
  185.         {
  186.             lex -> SortMessages();
  187.             for (int i = 0; i < lex -> bad_tokens.Length(); i++)
  188.                 lex -> PrintEmacsMessage(i);
  189.             Coutput.flush();
  190.         }
  191.         lex -> DestroyInput(); // get rid of input buffer
  192.     }
  193.     else
  194.     {
  195.         delete lex;
  196.         lex = NULL;
  197.     }
  198.  
  199.     file_symbol -> lex_stream = lex;
  200.  
  201.     return;
  202. }
  203.  
  204.  
  205. //
  206. // Scan the InputBuffer() and process all tokens and comments.
  207. //
  208. void Scanner::Scan()
  209. {
  210.     wchar_t *input_buffer_tail = &cursor[lex -> InputBufferLength()];
  211.  
  212.     //
  213.     // CURSOR is assumed to point to the next character to be scanned.
  214.     // Using CURSOR,we jump to the proper classification function
  215.     // which scans and classifies the token and returns the location of
  216.     // the character immediately following it.
  217.     //
  218.     do
  219.     {
  220.         SkipSpaces();
  221.  
  222.         //
  223.         // Allocate space for next token and set its location.
  224.         //
  225.         current_token_index = lex -> GetNextToken(cursor - lex -> InputBuffer());
  226.         current_token = &(lex -> token_stream[current_token_index]);
  227.  
  228.         (this ->* classify_token[*cursor < 128 ? *cursor : 128])();
  229.     } while (cursor < input_buffer_tail);
  230.  
  231.     //
  232.     // Add a a gate after the last line.
  233.     //
  234.     lex -> line_location.Next() = input_buffer_tail - lex -> InputBuffer();
  235.  
  236.     //
  237.     // If the brace_stack is not empty, then there are unmatched left
  238.     // braces in the input. Each unmatched left brace should point to
  239.     // the EOF token as a substitute for a matching right brace.
  240.     //
  241.     assert(current_token_index == lex -> token_stream.Length() - 1);
  242.  
  243.     for (LexStream::TokenIndex left_brace = brace_stack.Top(); left_brace; left_brace = brace_stack.Top())
  244.     {
  245.         lex -> token_stream[left_brace].SetRightBrace(current_token_index);
  246.         brace_stack.Pop();
  247.     }
  248.  
  249.     return;
  250. }
  251.  
  252.  
  253. //
  254. // CURSOR points to the starting position of a comment.  Scan the
  255. // the comment and return the location of the character immediately
  256. // following it. CURSOR is advanced accordingly.
  257. //
  258. void Scanner::ScanStarComment()
  259. {
  260.     LexStream::Comment *current_comment = (control.option.comments ? &(lex -> comment_stream.Next()) : new LexStream::Comment());
  261.     current_comment -> string = NULL;
  262.     current_comment -> previous_token = current_token_index; // the token that precedes this comment
  263.     current_comment -> location = cursor - lex -> InputBuffer();
  264.  
  265.     cursor += 2;
  266.  
  267.     //
  268.     // If this comment starts with the prefix "/**" then, it may be a document
  269.     // comment. Check whether or not it contains the deprecated tag and if so,
  270.     // mark the token preceeding it.
  271.     //
  272.     if (*cursor == U_STAR)
  273.     {
  274.         for (;;)
  275.         {
  276.             while (*cursor != U_STAR && (! Code::IsNewline(*cursor)) && *cursor != U_CTL_Z)
  277.             {
  278.                 if (cursor[0] == U_AT &&
  279.                     cursor[1] == U_d &&
  280.                     cursor[2] == U_e &&
  281.                     cursor[3] == U_p &&
  282.                     cursor[4] == U_r &&
  283.                     cursor[5] == U_e &&
  284.                     cursor[6] == U_c &&
  285.                     cursor[7] == U_a &&
  286.                     cursor[8] == U_t &&
  287.                     cursor[9] == U_e &&
  288.                     cursor[10] == U_d)
  289.                 {
  290.                     current_token -> SetDeprecated(); // the token that precedes this comment
  291.                 }
  292.                 cursor++;
  293.             }
  294.  
  295.             if (*cursor == U_STAR) // Potential comment closer
  296.             {
  297.                 while (*++cursor == U_STAR)
  298.                     ;
  299.                 if (*cursor == U_SLASH)
  300.                 {
  301.                     cursor++;
  302.                     current_comment -> length = (cursor - lex -> InputBuffer()) - current_comment -> location;
  303.                     if (! control.option.comments)
  304.                         delete current_comment;
  305.                     return;
  306.                 }
  307.             }
  308.             else if (Code::IsNewline(*cursor)) // Record new line
  309.             {
  310.                 cursor++;
  311.                 lex -> line_location.Next() = cursor - lex -> InputBuffer();
  312.             }
  313.             else break;
  314.         }
  315.     }
  316.     else
  317.     {
  318.         for (;;)
  319.         {
  320.             while (*cursor != U_STAR && (! Code::IsNewline(*cursor)) && *cursor != U_CTL_Z)
  321.                 cursor++;
  322.  
  323.             if (*cursor == U_STAR) // Potential comment closer
  324.             {
  325.                 while (*++cursor == U_STAR)
  326.                     ;
  327.                 if (*cursor == U_SLASH)
  328.                 {
  329.                     cursor++;
  330.                     current_comment -> length = (cursor - lex -> InputBuffer()) - current_comment -> location;
  331.                     if (! control.option.comments)
  332.                         delete current_comment;
  333.                     return;
  334.                 }
  335.             }
  336.             else if (Code::IsNewline(*cursor)) // Record new line
  337.             {
  338.                 cursor++;
  339.                 lex -> line_location.Next() = cursor - lex -> InputBuffer();
  340.             }
  341.             else break;
  342.         }
  343.     }
  344.  
  345.     lex -> bad_tokens.Next().Initialize(StreamError::UNTERMINATED_COMMENT,
  346.                                         current_comment -> location,
  347.                                         (unsigned) (cursor - lex -> InputBuffer()) - 1);
  348.  
  349.     current_comment -> length = (cursor - lex -> InputBuffer()) - current_comment -> location;
  350.  
  351.     if (! control.option.comments)
  352.         delete current_comment;
  353.  
  354.     return;
  355. }
  356.  
  357.  
  358. //
  359. //
  360. //
  361. void Scanner::ScanSlashComment()
  362. {
  363.     if (control.option.comments)
  364.     {
  365.         LexStream::Comment *current_comment = &(lex -> comment_stream.Next());
  366.         current_comment -> string = NULL;
  367.         current_comment -> previous_token = current_token_index;  // the token that precedes this comment
  368.         current_comment -> location = cursor - lex -> InputBuffer();
  369.         for (cursor += 2; ! Code::IsNewline(*cursor); cursor++)  // skip all until \n
  370.             ;
  371.         current_comment -> length = (cursor - lex -> InputBuffer()) - current_comment -> location;
  372.     }
  373.     else
  374.     {
  375.         for (cursor += 2; ! Code::IsNewline(*cursor); cursor++)  // skip all until \n
  376.             ;
  377.     }
  378.  
  379.     return;
  380. }
  381.  
  382.  
  383. //
  384. // This procedure is invoked to skip useless spaces in the input.
  385. // It assumes upon entry that CURSOR points to the next character to
  386. // be scanned.  Before returning it sets CURSOR to the location of the
  387. // first non-space character following its initial position.
  388. //
  389. inline void Scanner::SkipSpaces()
  390. {
  391.     do
  392.     {
  393.         while (Code::IsSpaceButNotNewline(*cursor))
  394.             cursor++;
  395.         while (Code::IsNewline(*cursor))        // starting a new line?
  396.         {
  397.             cursor++;
  398.             lex -> line_location.Next() = cursor - lex -> InputBuffer();
  399.             while (Code::IsSpaceButNotNewline(*cursor))
  400.                 cursor++;
  401.         }
  402.  
  403.         while (*cursor == U_SLASH)
  404.         {
  405.             if (cursor[1] == U_STAR)
  406.                  ScanStarComment();
  407.             else if (cursor[1] == U_SLASH)
  408.                  ScanSlashComment();
  409.             else break;
  410.         }
  411.     } while (Code::IsSpace(*cursor));
  412.  
  413.     return;
  414. }
  415.  
  416.  
  417. /**********************************************************************/
  418. /**********************************************************************/
  419. /**                                                                  **/
  420. /**                           scan_keyword(i):                       **/
  421. /**                                                                  **/
  422. /**********************************************************************/
  423. /**********************************************************************/
  424. /**                                                                  **/
  425. /** Scan an identifier of length I and determine if it is a keyword. **/
  426. /**                                                                  **/
  427. /**********************************************************************/
  428. /**********************************************************************/
  429. int Scanner::ScanKeyword0(wchar_t *p1)
  430. {
  431.     return TK_Identifier;
  432. }
  433.  
  434. int Scanner::ScanKeyword2(wchar_t *p1)
  435. {
  436.     if (p1[0] == U_d && p1[1] == U_o)
  437.         return TK_do;
  438.     else if (p1[0] == U_i && p1[1] == U_f)
  439.         return TK_if;
  440.  
  441.     return TK_Identifier;
  442. }
  443.  
  444. int Scanner::ScanKeyword3(wchar_t *p1)
  445. {
  446.     switch(*p1)
  447.     {
  448.         case U_f:
  449.             if (p1[1] == U_o && p1[2] == U_r)
  450.                 return TK_for;
  451.             break;
  452.         case U_i:
  453.             if (p1[1] == U_n && p1[2] == U_t)
  454.                 return TK_int;
  455.             break;
  456.         case U_n:
  457.             if (p1[1] == U_e && p1[2] == U_w)
  458.                 return TK_new;
  459.             break;
  460.         case U_t:
  461.             if (p1[1] == U_r && p1[2] == U_y)
  462.                 return TK_try;
  463.             break;
  464.     }
  465.  
  466.     return TK_Identifier;
  467. }
  468.  
  469. int Scanner::ScanKeyword4(wchar_t *p1)
  470. {
  471.     switch (*p1)
  472.     {
  473.         case U_b:
  474.             if (p1[1] == U_y && p1[2] == U_t && p1[3] == U_e)
  475.                 return TK_byte;
  476.             break;
  477.         case U_c:
  478.             if (p1[1] == U_a && p1[2] == U_s && p1[3] == U_e)
  479.                 return TK_case;
  480.             else if (p1[1] == U_h && p1[2] == U_a && p1[3] == U_r)
  481.                 return TK_char;
  482.             break;
  483.         case U_e:
  484.             if (p1[1] == U_l && p1[2] == U_s && p1[3] == U_e)
  485.                 return TK_else;
  486.             break;
  487.         case U_g:
  488.             if (p1[1] == U_o && p1[2] == U_t && p1[3] == U_o)
  489.                 return TK_goto;
  490.             break;
  491.         case U_l:
  492.             if (p1[1] == U_o && p1[2] == U_n && p1[3] == U_g)
  493.                 return TK_long;
  494.             break;
  495.         case U_n:
  496.             if (p1[1] == U_u && p1[2] == U_l && p1[3] == U_l)
  497.                 return TK_null;
  498.             break;
  499.         case U_t:
  500.             if (p1[1] == U_h && p1[2] == U_i && p1[3] == U_s)
  501.                 return TK_this;
  502.             else if (p1[1] == U_r && p1[2] == U_u && p1[3] == U_e)
  503.                 return TK_true;
  504.             break;
  505.         case U_v:
  506.             if (p1[1] == U_o && p1[2] == U_i && p1[3] == U_d)
  507.                 return TK_void;
  508.             break;
  509.     }
  510.  
  511.     return TK_Identifier;
  512. }
  513.  
  514. int Scanner::ScanKeyword5(wchar_t *p1)
  515. {
  516.     switch (*p1)
  517.     {
  518.         case U_b:
  519.             if (p1[1] == U_r && p1[2] == U_e &&
  520.                 p1[3] == U_a && p1[4] == U_k)
  521.                 return TK_break;
  522.             break;
  523.         case U_c:
  524.             if (p1[1] == U_a && p1[2] == U_t &&
  525.                 p1[3] == U_c && p1[4] == U_h)
  526.                 return TK_catch;
  527.             else if (p1[1] == U_l && p1[2] == U_a &&
  528.                      p1[3] == U_s && p1[4] == U_s)
  529.                 return TK_class;
  530.             else if (p1[1] == U_o && p1[2] == U_n &&
  531.                      p1[3] == U_s && p1[4] == U_t)
  532.                 return TK_const;
  533.             break;
  534.         case U_f:
  535.             if (p1[1] == U_a && p1[2] == U_l &&
  536.                 p1[3] == U_s && p1[4] == U_e)
  537.                 return TK_false;
  538.             else if (p1[1] == U_i && p1[2] == U_n &&
  539.                      p1[3] == U_a && p1[4] == U_l)
  540.                 return TK_final;
  541.             else if (p1[1] == U_l && p1[2] == U_o &&
  542.                      p1[3] == U_a && p1[4] == U_t)
  543.                 return TK_float;
  544.             break;
  545.         case U_s:
  546.             if (p1[1] == U_h && p1[2] == U_o &&
  547.                 p1[3] == U_r && p1[4] == U_t)
  548.                 return TK_short;
  549.             else if (p1[1] == U_u && p1[2] == U_p &&
  550.                      p1[3] == U_e && p1[4] == U_r)
  551.                 return TK_super;
  552.             break;
  553.         case U_t:
  554.             if (p1[1] == U_h && p1[2] == U_r &&
  555.                 p1[3] == U_o && p1[4] == U_w)
  556.                 return TK_throw;
  557.             break;
  558.         case U_w:
  559.             if (p1[1] == U_h && p1[2] == U_i &&
  560.                 p1[3] == U_l && p1[4] == U_e)
  561.                 return TK_while;
  562.             break;
  563.     }
  564.  
  565.     return TK_Identifier;
  566. }
  567.  
  568. int Scanner::ScanKeyword6(wchar_t *p1)
  569. {
  570.     switch (*p1)
  571.     {
  572.         case U_d:
  573.             if (p1[1] == U_o && p1[2] == U_u &&
  574.                      p1[3] == U_b && p1[4] == U_l && p1[5] == U_e)
  575.                 return TK_double;
  576.             break;
  577.         case U_i:
  578.             if (p1[1] == U_m && p1[2] == U_p &&
  579.                 p1[3] == U_o && p1[4] == U_r && p1[5] == U_t)
  580.                 return TK_import;
  581.             break;
  582.         case U_n:
  583.             if (p1[1] == U_a && p1[2] == U_t &&
  584.                 p1[3] == U_i && p1[4] == U_v && p1[5] == U_e)
  585.                 return TK_native;
  586.             break;
  587.         case U_p:
  588.             if (p1[1] == U_u && p1[2] == U_b &&
  589.                 p1[3] == U_l && p1[4] == U_i && p1[5] == U_c)
  590.                 return TK_public;
  591.             break;
  592.         case U_r:
  593.             if (p1[1] == U_e && p1[2] == U_t &&
  594.                 p1[3] == U_u && p1[4] == U_r && p1[5] == U_n)
  595.                 return TK_return;
  596.             break;
  597.         case U_s:
  598.             if (p1[1] == U_t && p1[2] == U_a &&
  599.                 p1[3] == U_t && p1[4] == U_i && p1[5] == U_c)
  600.                     return TK_static;
  601.             else if (p1[1] == U_w && p1[2] == U_i &&
  602.                      p1[3] == U_t && p1[4] == U_c && p1[5] == U_h)
  603.                 return TK_switch;
  604.             break;
  605.         case U_t:
  606.             if (p1[1] == U_h && p1[2] == U_r &&
  607.                 p1[3] == U_o && p1[4] == U_w && p1[5] == U_s)
  608.                 return TK_throws;
  609.             break;
  610.     }
  611.  
  612.     return TK_Identifier;
  613. }
  614.  
  615. int Scanner::ScanKeyword7(wchar_t *p1)
  616. {
  617.     switch(*p1)
  618.     {
  619.         case U_b:
  620.             if (p1[1] == U_o && p1[2] == U_o && p1[3] == U_l &&
  621.                 p1[4] == U_e && p1[5] == U_a && p1[6] == U_n)
  622.                 return TK_boolean;
  623.         case U_d:
  624.             if (p1[1] == U_e && p1[2] == U_f && p1[3] == U_a &&
  625.                 p1[4] == U_u && p1[5] == U_l && p1[6] == U_t)
  626.                 return TK_default;
  627.             break;
  628.         case U_e:
  629.             if (p1[1] == U_x && p1[2] == U_t && p1[3] == U_e &&
  630.                 p1[4] == U_n && p1[5] == U_d && p1[6] == U_s)
  631.                 return TK_extends;
  632.             break;
  633.         case U_f:
  634.             if (p1[1] == U_i && p1[2] == U_n && p1[3] == U_a &&
  635.                 p1[4] == U_l && p1[5] == U_l && p1[6] == U_y)
  636.                 return TK_finally;
  637.             break;
  638.         case U_p:
  639.             if (p1[1] == U_a && p1[2] == U_c && p1[3] == U_k &&
  640.                 p1[4] == U_a && p1[5] == U_g && p1[6] == U_e)
  641.                 return TK_package;
  642.             else if (p1[1] == U_r && p1[2] == U_i && p1[3] == U_v &&
  643.                      p1[4] == U_a && p1[5] == U_t && p1[6] == U_e)
  644.                 return TK_private;
  645.             break;
  646.     }
  647.  
  648.     return TK_Identifier;
  649. }
  650.  
  651. int Scanner::ScanKeyword8(wchar_t *p1)
  652. {
  653.     switch(*p1)
  654.     {
  655.         case U_a:
  656.             if (p1[1] == U_b && p1[2] == U_s &&
  657.                 p1[3] == U_t && p1[4] == U_r &&
  658.                 p1[5] == U_a && p1[6] == U_c && p1[7] == U_t)
  659.                  return TK_abstract;
  660.             break;
  661.         case U_c:
  662.             if (p1[1] == U_o && p1[2] == U_n &&
  663.                 p1[3] == U_t && p1[4] == U_i &&
  664.                 p1[5] == U_n && p1[6] == U_u && p1[7] == U_e)
  665.                  return TK_continue;
  666.             break;
  667.         case U_s:
  668.             if (p1[1] == U_t && p1[2] == U_r &&
  669.                 p1[3] == U_i && p1[4] == U_c &&
  670.                 p1[5] == U_t && p1[6] == U_f && p1[7] == U_p)
  671.                  return TK_strictfp;
  672.             break;
  673.         case U_v:
  674.             if (p1[1] == U_o && p1[2] == U_l &&
  675.                 p1[3] == U_a && p1[4] == U_t &&
  676.                 p1[5] == U_i && p1[6] == U_l && p1[7] == U_e)
  677.                  return TK_volatile;
  678.             break;
  679.     }
  680.  
  681.     return TK_Identifier;
  682. }
  683.  
  684. int Scanner::ScanKeyword9(wchar_t *p1)
  685. {
  686.     if (p1[0] == U_i && p1[1] == U_n && p1[2] == U_t &&
  687.         p1[3] == U_e && p1[4] == U_r && p1[5] == U_f &&
  688.         p1[6] == U_a && p1[7] == U_c && p1[8] == U_e)
  689.         return TK_interface;
  690.     else if (p1[0] == U_p && p1[1] == U_r && p1[2] == U_o &&
  691.              p1[3] == U_t && p1[4] == U_e && p1[5] == U_c &&
  692.              p1[6] == U_t && p1[7] == U_e && p1[8] == U_d)
  693.         return TK_protected;
  694.     else if (p1[0] == U_t && p1[1] == U_r && p1[2] == U_a &&
  695.              p1[3] == U_n && p1[4] == U_s && p1[5] == U_i &&
  696.              p1[6] == U_e && p1[7] == U_n && p1[8] == U_t)
  697.         return TK_transient;
  698.  
  699.     return TK_Identifier;
  700. }
  701.  
  702. int Scanner::ScanKeyword10(wchar_t *p1)
  703. {
  704.     if (p1[0] == U_i && p1[1] == U_m && p1[2] == U_p &&
  705.         p1[3] == U_l && p1[4] == U_e && p1[5] == U_m &&
  706.         p1[6] == U_e && p1[7] == U_n && p1[8] == U_t && p1[9] == U_s)
  707.         return TK_implements;
  708.     else if (p1[0] == U_i && p1[1] == U_n && p1[2] == U_s &&
  709.              p1[3] == U_t && p1[4] == U_a && p1[5] == U_n &&
  710.              p1[6] == U_c && p1[7] == U_e && p1[8] == U_o && p1[9] == U_f)
  711.         return TK_instanceof;
  712.  
  713.     return TK_Identifier;
  714. }
  715.  
  716. int Scanner::ScanKeyword12(wchar_t *p1)
  717. {
  718.     if (p1[0] == U_s && p1[1] == U_y && p1[2] == U_n &&
  719.         p1[3] == U_c && p1[4] == U_h && p1[5] == U_r &&
  720.         p1[6] == U_o && p1[7] == U_n && p1[8] == U_i &&
  721.         p1[9] == U_z && p1[10] == U_e&& p1[11] == U_d)
  722.         return TK_synchronized;
  723.  
  724.     return TK_Identifier;
  725. }
  726.  
  727. /**********************************************************************/
  728. /*                           CHECK_OctalLiteral:                      */
  729. /**********************************************************************/
  730. /* Verify that an octal token is legal. If not, issue a message.      */
  731. /**********************************************************************/
  732. inline void Scanner::CheckOctalLiteral(wchar_t *cursor, wchar_t *tail)
  733. {
  734.     if (cursor[0] == U_0 && cursor[1] != U_x && cursor[1] != U_X)
  735.     {
  736.         wchar_t *p;
  737.         for (p = cursor + 1; p < tail; p++)
  738.         {
  739.             if (*p == U_8 || *p == U_9)
  740.                 break;
  741.         }
  742.  
  743.         if (p < tail)
  744.             lex -> bad_tokens.Next().Initialize(StreamError::BAD_OCTAL_CONSTANT,
  745.                                                 (unsigned) (cursor - lex -> InputBuffer()),
  746.                                                 (unsigned) (tail - lex -> InputBuffer()) - 1);
  747.     }
  748.  
  749.     return;
  750. }
  751.  
  752.  
  753. /**********************************************************************/
  754. /*                      ClassifyCharLiteral:                          */
  755. /**********************************************************************/
  756. /* This procedure is invoked to scan a character literal or a large   */
  757. /* character literal. A large character literal is preceded by the    */
  758. /* letter L (capital L). After the character literal has been scanned */
  759. /* and classified, it is entered in the table without its closing     */
  760. /* quote but with the opening quote (preceded by L if it's a large    */
  761. /* character literal).                                                */
  762. /**********************************************************************/
  763. void Scanner::ClassifyCharLiteral()
  764. {
  765.     current_token -> SetKind(TK_CharacterLiteral);
  766.  
  767.     wchar_t *ptr = cursor + 1;
  768.  
  769.     while (*ptr != U_SINGLE_QUOTE && (! Code::IsNewline(*ptr)))
  770.     {
  771.         if (*ptr++ == U_BACKSLASH)   // In any case, skip the character
  772.         {                            // If it was a backslash,
  773.             if (! Code::IsNewline(*ptr)) // if the next char is not eol, skip it.
  774.                 ptr++;
  775.         }
  776.     }
  777.  
  778.     int len = ptr - cursor;
  779.     if (*ptr == U_SINGLE_QUOTE)
  780.     {
  781.         if (len == 1)
  782.             lex -> bad_tokens.Next().Initialize(StreamError::EMPTY_CHARACTER_CONSTANT,
  783.                                                 current_token -> Location(),
  784.                                                 (unsigned) (ptr - lex -> InputBuffer()));
  785.         ptr++;
  786.     }
  787.     else
  788.     {
  789.         if (len == 1) /* Definitely, an isolated quote */
  790.             current_token -> SetKind(0);
  791.         lex -> bad_tokens.Next().Initialize(StreamError::UNTERMINATED_CHARACTER_CONSTANT,
  792.                                             current_token -> Location(),
  793.                                             (unsigned) (ptr - lex -> InputBuffer()) - 1);
  794.     }
  795.  
  796.     current_token -> SetSymbol(control.char_table.FindOrInsertLiteral(cursor, ptr - cursor));
  797.  
  798.     cursor = ptr;
  799.     return;
  800. }
  801.  
  802.  
  803. /**********************************************************************/
  804. /*                     CLASSIFY_STRINGLITERAL:                        */
  805. /**********************************************************************/
  806. /* This procedure is invoked to scan a string literal or a large      */
  807. /* string literal. A large string literal is preceded by the letter   */
  808. /* L (capital L). After the string literal has been scanned and       */
  809. /* classified, it is entered in the table without its closing double  */
  810. /* quote but with the opening quote (preceded by L if it's a large    */
  811. /* string literal).                                                   */
  812. /**********************************************************************/
  813. void Scanner::ClassifyStringLiteral()
  814. {
  815.     current_token -> SetKind(TK_StringLiteral);
  816.  
  817.     wchar_t *ptr = cursor + 1;
  818.  
  819.     while (*ptr != U_DOUBLE_QUOTE && (! Code::IsNewline(*ptr)))
  820.     {
  821.         if (*ptr++ == U_BACKSLASH)   // In any case, skip the character
  822.         {                            // If it was a backslash,
  823.             if (! Code::IsNewline(*ptr)) // if the next char is not eol, skip it.
  824.                 ptr++;
  825.         }
  826.     }
  827.  
  828.     if (*ptr == U_DOUBLE_QUOTE)
  829.         ptr++;
  830.     else
  831.     {
  832.         if ((ptr - cursor) == 1) /* Definitely, an isolated double quote */
  833.             current_token -> SetKind(0);
  834.         lex -> bad_tokens.Next().Initialize(StreamError::UNTERMINATED_STRING_CONSTANT,
  835.                                             current_token -> Location(),
  836.                                             (unsigned) (ptr - lex -> InputBuffer()) - 1);
  837.     }
  838.  
  839.     current_token -> SetSymbol(control.string_table.FindOrInsertLiteral(cursor, ptr - cursor));
  840.  
  841.     cursor = ptr;
  842.     return;
  843. }
  844.  
  845.  
  846. /**********************************************************************/
  847. /*                     CLASSIFYIDORKEYWORD:                        */
  848. /**********************************************************************/
  849. /* This procedure is invoked when CURSOR points to one of the         */
  850. /* following characters:                                              */
  851. /*                                                                    */
  852. /*      'a'                                                           */
  853. /*      'b'                                                           */
  854. /*      'c'                                                           */
  855. /*      'd'                                                           */
  856. /*      'e'                                                           */
  857. /*      'f'                                                           */
  858. /*      'g'                                                           */
  859. /*      'i'                                                           */
  860. /*      'l'                                                           */
  861. /*      'n'                                                           */
  862. /*      'o'                                                           */
  863. /*      'p'                                                           */
  864. /*      'r'                                                           */
  865. /*      's'                                                           */
  866. /*      't'                                                           */
  867. /*      'v'                                                           */
  868. /*      'w'                                                           */
  869. /*                                                                    */
  870. /* It scans the identifier and checks whether or not it is a keyword. */
  871. /*                                                                    */
  872. /* NOTE that the use of that check is a time-optimization that is not */
  873. /* required for correctness.                                          */
  874. /**********************************************************************/
  875. void Scanner::ClassifyIdOrKeyword()
  876. {
  877.     wchar_t *ptr = cursor + 1;
  878.  
  879.     while (Code::IsAlnum(*ptr))
  880.         ptr++;
  881.     int len = ptr - cursor;
  882.  
  883.     current_token -> SetKind(len < 13 ? (scan_keyword[len])(cursor) : TK_Identifier);
  884.     if (current_token -> Kind() == TK_Identifier)
  885.     {
  886.         current_token -> SetSymbol(control.FindOrInsertName(cursor, len));
  887.         for (int i = 0; i < control.option.keyword_map.Length(); i++)
  888.         {
  889.             if (control.option.keyword_map[i].length == len && wcsncmp(cursor, control.option.keyword_map[i].name, len) == 0)
  890.                 current_token -> SetKind(control.option.keyword_map[i].key);
  891.         }
  892.     }
  893.     else if (current_token -> Kind() == TK_class || current_token -> Kind() == TK_interface)
  894.     {
  895.         //
  896.         // This type keyword is not nested. When we encounter an occurrence of the keyword
  897.         // class or interface that is not enclosed in at least one set of braces, we keep track
  898.         // of it by adding it to a list.
  899.         //
  900.         if (brace_stack.Size() == 0)
  901.             lex -> type_index.Next() = current_token_index;
  902.     }
  903.  
  904.     cursor = ptr;
  905.  
  906.     return;
  907. }
  908.  
  909. /**********************************************************************/
  910. /*                             CLASSIFY_ID:                           */
  911. /**********************************************************************/
  912. /* This procedure is invoked when CURSOR points to an alphabetic      */
  913. /* character other than the ones identified above or '$' or '_'.      */
  914. /* A token that starts with one of these letters is an identifier.    */
  915. /**********************************************************************/
  916. void Scanner::ClassifyId()
  917. {
  918.     wchar_t *ptr = cursor + 1;
  919.  
  920.     while (Code::IsAlnum(*ptr))
  921.         ptr++;
  922.  
  923.     int len = ptr - cursor;
  924.  
  925.     current_token -> SetKind(TK_Identifier);
  926.     current_token -> SetSymbol(control.FindOrInsertName(cursor, len));
  927.  
  928.     for (int i = 0; i < control.option.keyword_map.Length(); i++)
  929.     {
  930.         if (control.option.keyword_map[i].length == len && wcsncmp(cursor, control.option.keyword_map[i].name, len) == 0)
  931.             current_token -> SetKind(control.option.keyword_map[i].key);
  932.     }
  933.  
  934.     cursor = ptr;
  935.     return;
  936. }
  937.  
  938.  
  939. /**********************************************************************/
  940. /*                     CLASSIFY_NUMERICLITERAL:                       */
  941. /**********************************************************************/
  942. /* This procedure is invoked when CURSOR points directly to one of    */
  943. /* the characters below or to a '.' followed by one of the characters */
  944. /* below:                                                             */
  945. /*                                                                    */
  946. /*        case '0': case '1': case '2': case '3': case '4':           */
  947. /*        case '5': case '6': case '7': case '8': case '9':           */
  948. /*                                                                    */
  949. /* Such a token is classified as a numeric literal:                   */
  950. /*                                                                    */
  951. /*   TK_LongLiteral, TK_IntegerLiteral,                               */
  952. /*   TK_DOUBLELiteral, TK_FloatingPointLiteral                        */
  953. /**********************************************************************/
  954. void Scanner::ClassifyNumericLiteral()
  955. {
  956.     /******************************************************************/
  957.     /* Scan the initial sequence of digits if any.                    */
  958.     /******************************************************************/
  959.     wchar_t *ptr;
  960.     for (ptr = cursor; Code::IsDigit(*ptr); ptr++)
  961.         ;
  962.  
  963.     /******************************************************************/
  964.     /* We now take an initial crack at classifying the numeric token. */
  965.     /* we have four cases to consider.                                */
  966.     /*                                                                */
  967.     /* 1) If the initial (perhaps an empty) sequence of digits is     */
  968.     /*    followed by a period ('.'), we have a floating-constant.    */
  969.     /*    We scan the sequence of digits (if any) that follows the    */
  970.     /*    period.                                                     */
  971.     /*                                                                */
  972.     /* 2) Otherwise, we hava an integer literal.                      */
  973.     /*                                                                */
  974.     /*    If the initial (can't be empty) sequence of digits start    */
  975.     /*    with "0x" or "0X" we have a hexadecimal constant:           */
  976.     /*    continue scanning all hex-digits that follow the 'x'.       */
  977.     /******************************************************************/
  978.     if (*ptr == U_DOT)
  979.     {
  980.         current_token -> SetKind(TK_DoubleLiteral);
  981.         for (ptr++; Code::IsDigit(*ptr); ptr++)
  982.             ;
  983.     }
  984.     else
  985.     {
  986.         current_token -> SetKind(TK_IntegerLiteral);
  987.         if (*cursor == U_0 && (cursor[1] == U_x || cursor[1] == U_X))
  988.         {
  989.             ptr = cursor + 2;
  990.             if (isxdigit(*ptr))
  991.             {
  992.                 for (ptr++; isxdigit(*ptr); ptr++)
  993.                     ;
  994.             }
  995.             else lex -> bad_tokens.Next().Initialize(StreamError::INVALID_HEX_CONSTANT,
  996.                                                      current_token -> Location(),
  997.                                                      (unsigned) (ptr - lex -> InputBuffer()) - 1);
  998.         }
  999.     }
  1000.  
  1001.     /******************************************************************/
  1002.     /* If the initial numeric token is followed by an exponent, then  */
  1003.     /* it is a floating-constant. If that's the case, the literal is  */
  1004.     /* reclassified ant the exponent is scanned.                      */
  1005.     /*                                                                */
  1006.     /* NOTE that as 'E' and 'e' are legitimate hexadecimal digits, we */
  1007.     /* don't have to worry about a hexadecimal constant being used as */
  1008.     /* the prefix of a floating-constant. E.g., 0x123e12 is tokenized */
  1009.     /* as a single hexadecimal digit. The string 0x123e+12 gets       */
  1010.     /* broken down as the hex number 0x123e, the operator '+' and the */
  1011.     /* decimal constant 12.                                           */
  1012.     /******************************************************************/
  1013.     if (*ptr == U_e || *ptr == U_E)
  1014.     {
  1015.         current_token -> SetKind(TK_DoubleLiteral);
  1016.  
  1017.         ptr++; /* Skip the 'e' or 'E' */
  1018.  
  1019.         if (*ptr == U_PLUS || *ptr == U_MINUS)
  1020.             ptr++; /* Skip the '+' or '-' */
  1021.  
  1022.         if (Code::IsDigit(*ptr))
  1023.         {
  1024.             for (ptr++; Code::IsDigit(*ptr); ptr++)
  1025.                 ;
  1026.         }
  1027.         else lex -> bad_tokens.Next().Initialize(StreamError::INVALID_FLOATING_CONSTANT_EXPONENT,
  1028.                                                  current_token -> Location(),
  1029.                                                  (unsigned) (ptr - lex -> InputBuffer()) - 1);
  1030.     }
  1031.  
  1032.     /******************************************************************/
  1033.     /* A numeric constant may be suffixed by a letter that further    */
  1034.     /* qualifies what kind of a constant it is. We check for these    */
  1035.     /* suffixes here.                                                 */
  1036.     /******************************************************************/
  1037.     int len;
  1038.  
  1039.     if (*ptr == U_f || *ptr == U_F)
  1040.     {
  1041.         ptr++;
  1042.         len = ptr - cursor;
  1043.         current_token -> SetSymbol(control.float_table.FindOrInsertLiteral(cursor, len));
  1044.         current_token -> SetKind(TK_FloatingPointLiteral);
  1045.     }
  1046.     else if (*ptr == U_d || *ptr == U_D)
  1047.     {
  1048.         ptr++;
  1049.         len = ptr - cursor;
  1050.         current_token -> SetSymbol(control.double_table.FindOrInsertLiteral(cursor, len));
  1051.         current_token -> SetKind(TK_DoubleLiteral);
  1052.     }
  1053.     else if (current_token -> Kind() == TK_IntegerLiteral)
  1054.     {
  1055.         if (*ptr == U_l || *ptr == U_L)
  1056.         {
  1057.             ptr++; /* Skip the 'l' or 'L' */
  1058.             len = ptr - cursor;
  1059.             current_token -> SetSymbol(control.long_table.FindOrInsertLiteral(cursor, len));
  1060.             current_token -> SetKind(TK_LongLiteral);
  1061.         }
  1062.         else
  1063.         {
  1064.             len = ptr - cursor;
  1065.             current_token -> SetSymbol(control.int_table.FindOrInsertLiteral(cursor, len));
  1066.         }
  1067.  
  1068.         CheckOctalLiteral(cursor, ptr);
  1069.     }
  1070.     else
  1071.     {
  1072.         len = ptr - cursor;
  1073.         current_token -> SetSymbol(control.double_table.FindOrInsertLiteral(cursor, len));
  1074.         current_token -> SetKind(TK_DoubleLiteral);
  1075.     }
  1076.  
  1077.     /******************************************************************/
  1078.     /* We now have scanned the complete token and it has been properly*/
  1079.     /* classified. CURSOR points to its first character in the buffer */
  1080.     /* and PTR points to the character immediately following it. We   */
  1081.     /* insert the name into the name table and if the token is an     */
  1082.     /* octal constant, we check that all the digits in its name are   */
  1083.     /* in the range 0-7.                                              */
  1084.     /******************************************************************/
  1085.  
  1086.     cursor = ptr;
  1087.     return;
  1088. }
  1089.  
  1090.  
  1091. /**********************************************************************/
  1092. /*                         CLASSIFY_COLON:                            */
  1093. /**********************************************************************/
  1094. void Scanner::ClassifyColon()
  1095. {
  1096.     current_token -> SetKind(TK_COLON);
  1097.  
  1098.     cursor++;
  1099.  
  1100.     return;
  1101. }
  1102.  
  1103.  
  1104. /**********************************************************************/
  1105. /*                          CLASSIFY_PLUS:                            */
  1106. /**********************************************************************/
  1107. void Scanner::ClassifyPlus()
  1108. {
  1109.     cursor++;
  1110.  
  1111.     if (*cursor == U_PLUS)
  1112.     {
  1113.         cursor++;
  1114.         current_token -> SetKind(TK_PLUS_PLUS);
  1115.     }
  1116.     else if (*cursor == U_EQUAL)
  1117.     {
  1118.         cursor++;
  1119.         current_token -> SetKind(TK_PLUS_EQUAL);
  1120.     }
  1121.     else current_token -> SetKind(TK_PLUS);
  1122.  
  1123.     return;
  1124. }
  1125.  
  1126.  
  1127. /**********************************************************************/
  1128. /*                         CLASSIFY_MINUS:                            */
  1129. /**********************************************************************/
  1130. void Scanner::ClassifyMinus()
  1131. {
  1132.     cursor++;
  1133.  
  1134.     if (*cursor == U_MINUS)
  1135.     {
  1136.         cursor++;
  1137.         current_token -> SetKind(TK_MINUS_MINUS);
  1138.     }
  1139.     else if (*cursor == U_EQUAL)
  1140.     {
  1141.         cursor++;
  1142.         current_token -> SetKind(TK_MINUS_EQUAL);
  1143.     }
  1144.     else current_token -> SetKind(TK_MINUS);
  1145.  
  1146.     return;
  1147. }
  1148.  
  1149.  
  1150. /**********************************************************************/
  1151. /*                          CLASSIFY_STAR:                            */
  1152. /**********************************************************************/
  1153. void Scanner::ClassifyStar()
  1154. {
  1155.     cursor++;
  1156.  
  1157.     if (*cursor == U_EQUAL)
  1158.     {
  1159.         cursor++;
  1160.         current_token -> SetKind(TK_MULTIPLY_EQUAL);
  1161.     }
  1162.     else current_token -> SetKind(TK_MULTIPLY);
  1163.  
  1164.     return;
  1165. }
  1166.  
  1167.  
  1168. /**********************************************************************/
  1169. /*                         CLASSIFY_SLASH:                            */
  1170. /**********************************************************************/
  1171. void Scanner::ClassifySlash()
  1172. {
  1173.     cursor++;
  1174.  
  1175.     if (*cursor == U_EQUAL)
  1176.     {
  1177.         cursor++;
  1178.         current_token -> SetKind(TK_DIVIDE_EQUAL);
  1179.     }
  1180.     else current_token -> SetKind(TK_DIVIDE);
  1181.  
  1182.     return;
  1183. }
  1184.  
  1185.  
  1186. /**********************************************************************/
  1187. /*                         CLASSIFY_LESS:                             */
  1188. /**********************************************************************/
  1189. void Scanner::ClassifyLess()
  1190. {
  1191.     cursor++;
  1192.  
  1193.     if (*cursor == U_EQUAL)
  1194.     {
  1195.         cursor++;
  1196.         current_token -> SetKind(TK_LESS_EQUAL);
  1197.     }
  1198.     else if (*cursor == U_LESS)
  1199.     {
  1200.         cursor++;
  1201.  
  1202.         if (*cursor == U_EQUAL)
  1203.         {
  1204.             cursor++;
  1205.             current_token -> SetKind(TK_LEFT_SHIFT_EQUAL);
  1206.         }
  1207.         else current_token -> SetKind(TK_LEFT_SHIFT);
  1208.     }
  1209.     else current_token -> SetKind(TK_LESS);
  1210.  
  1211.     return;
  1212. }
  1213.  
  1214.  
  1215. /**********************************************************************/
  1216. /*                        CLASSIFY_GREATER:                           */
  1217. /**********************************************************************/
  1218. void Scanner::ClassifyGreater()
  1219. {
  1220.     cursor++;
  1221.  
  1222.     if (*cursor == U_EQUAL)
  1223.     {
  1224.         cursor++;
  1225.         current_token -> SetKind(TK_GREATER_EQUAL);
  1226.     }
  1227.     else if (*cursor == U_GREATER)
  1228.     {
  1229.         cursor++;
  1230.  
  1231.         if (*cursor == U_EQUAL)
  1232.         {
  1233.             cursor++;
  1234.             current_token -> SetKind(TK_RIGHT_SHIFT_EQUAL);
  1235.         }
  1236.         else if (*cursor == U_GREATER)
  1237.         {
  1238.             cursor++;
  1239.  
  1240.             if (*cursor == U_EQUAL)
  1241.             {
  1242.                 cursor++;
  1243.                 current_token -> SetKind(TK_UNSIGNED_RIGHT_SHIFT_EQUAL);
  1244.             }
  1245.             else current_token -> SetKind(TK_UNSIGNED_RIGHT_SHIFT);
  1246.         }
  1247.         else current_token -> SetKind(TK_RIGHT_SHIFT);
  1248.     }
  1249.     else current_token -> SetKind(TK_GREATER);
  1250.  
  1251.     return;
  1252. }
  1253.  
  1254.  
  1255. /**********************************************************************/
  1256. /*                          CLASSIFY_AND:                             */
  1257. /**********************************************************************/
  1258. void Scanner::ClassifyAnd()
  1259. {
  1260.     cursor++;
  1261.  
  1262.     if (*cursor == U_AMPERSAND)
  1263.     {
  1264.         cursor++;
  1265.         current_token -> SetKind(TK_AND_AND);
  1266.     }
  1267.     else if (*cursor == U_EQUAL)
  1268.     {
  1269.         cursor++;
  1270.         current_token -> SetKind(TK_AND_EQUAL);
  1271.     }
  1272.     else current_token -> SetKind(TK_AND);
  1273.  
  1274.     return;
  1275. }
  1276.  
  1277.  
  1278. /**********************************************************************/
  1279. /*                          CLASSIFY_OR:                              */
  1280. /**********************************************************************/
  1281. void Scanner::ClassifyOr()
  1282. {
  1283.     cursor++;
  1284.  
  1285.     if (*cursor == U_BAR)
  1286.     {
  1287.         cursor++;
  1288.         current_token -> SetKind(TK_OR_OR);
  1289.     }
  1290.     else if (*cursor == U_EQUAL)
  1291.     {
  1292.         cursor++;
  1293.         current_token -> SetKind(TK_OR_EQUAL);
  1294.     }
  1295.     else current_token -> SetKind(TK_OR);
  1296.  
  1297.     return;
  1298. }
  1299.  
  1300.  
  1301. /**********************************************************************/
  1302. /*                          CLASSIFY_XOR:                             */
  1303. /**********************************************************************/
  1304. void Scanner::ClassifyXor()
  1305. {
  1306.     cursor++;
  1307.  
  1308.     if (*cursor == U_EQUAL)
  1309.     {
  1310.         cursor++;
  1311.         current_token -> SetKind(TK_XOR_EQUAL);
  1312.     }
  1313.     else current_token -> SetKind(TK_XOR);
  1314.  
  1315.     return;
  1316. }
  1317.  
  1318.  
  1319. /**********************************************************************/
  1320. /*                          CLASSIFY_NOT:                             */
  1321. /**********************************************************************/
  1322. void Scanner::ClassifyNot()
  1323. {
  1324.     cursor++;
  1325.  
  1326.     if (*cursor == U_EQUAL)
  1327.     {
  1328.         cursor++;
  1329.         current_token -> SetKind(TK_NOT_EQUAL);
  1330.     }
  1331.     else current_token -> SetKind(TK_NOT);
  1332.  
  1333.     return;
  1334. }
  1335.  
  1336.  
  1337. /**********************************************************************/
  1338. /*                         CLASSIFY_EQUAL:                            */
  1339. /**********************************************************************/
  1340. void Scanner::ClassifyEqual()
  1341. {
  1342.     cursor++;
  1343.  
  1344.     if (*cursor == U_EQUAL)
  1345.     {
  1346.         cursor++;
  1347.         current_token -> SetKind(TK_EQUAL_EQUAL);
  1348.     }
  1349.     else current_token -> SetKind(TK_EQUAL);
  1350.  
  1351.     return;
  1352. }
  1353.  
  1354.  
  1355. /**********************************************************************/
  1356. /*                          CLASSIFY_MOD:                             */
  1357. /**********************************************************************/
  1358. void Scanner::ClassifyMod()
  1359. {
  1360.     cursor++;
  1361.  
  1362.     if (*cursor == U_EQUAL)
  1363.     {
  1364.         cursor++;
  1365.         current_token -> SetKind(TK_REMAINDER_EQUAL);
  1366.     }
  1367.     else current_token -> SetKind(TK_REMAINDER);
  1368.  
  1369.     return;
  1370. }
  1371.  
  1372.  
  1373. /**********************************************************************/
  1374. /*                         CLASSIFY_PERIOD:                           */
  1375. /**********************************************************************/
  1376. void Scanner::ClassifyPeriod()
  1377. {
  1378.     if (Code::IsDigit(cursor[1])) // Is period immediately followed by digit?
  1379.         ClassifyNumericLiteral();
  1380.     else
  1381.     {
  1382.         current_token -> SetKind(TK_DOT);
  1383.  
  1384.         cursor++;
  1385.     }
  1386.  
  1387.     return;
  1388. }
  1389.  
  1390.  
  1391. /**********************************************************************/
  1392. /*                         CLASSIFY_SEMICOLON:                        */
  1393. /**********************************************************************/
  1394. void Scanner::ClassifySemicolon()
  1395. {
  1396.     current_token -> SetKind(TK_SEMICOLON);
  1397.  
  1398.     cursor++;
  1399.  
  1400.     return;
  1401. }
  1402.  
  1403.  
  1404. /**********************************************************************/
  1405. /*                           CLASSIFY_COMMA:                          */
  1406. /**********************************************************************/
  1407. void Scanner::ClassifyComma()
  1408. {
  1409.     current_token -> SetKind(TK_COMMA);
  1410.  
  1411.     cursor++;
  1412.  
  1413.     return;
  1414. }
  1415.  
  1416.  
  1417. /**********************************************************************/
  1418. /*                           CLASSIFY_LBRACE:                         */
  1419. /**********************************************************************/
  1420. void Scanner::ClassifyLbrace()
  1421. {
  1422.     //
  1423.     // Instead of setting the symbol for a left brace, we keep track of it.
  1424.     // When we encounter its matching right brace, we use the symbol field
  1425.     // to identify its counterpart.
  1426.     //
  1427.     brace_stack.Push(current_token_index);
  1428.  
  1429.     current_token -> SetKind(TK_LBRACE);
  1430.  
  1431.     cursor++;
  1432.  
  1433.     return;
  1434. }
  1435.  
  1436.  
  1437. /**********************************************************************/
  1438. /*                           CLASSIFY_RBRACE:                         */
  1439. /**********************************************************************/
  1440. void Scanner::ClassifyRbrace()
  1441. {
  1442.     //
  1443.     // When a left brace in encountered, it is pushed into the brace_stack.
  1444.     // When its matching right brace in encountered, we pop the left brace
  1445.     // and make it point to its matching right brace.
  1446.     //
  1447.     LexStream::TokenIndex left_brace = brace_stack.Top();
  1448.     if (left_brace) // This right brace is matched by a left one
  1449.     {
  1450.         lex -> token_stream[left_brace].SetRightBrace(current_token_index);
  1451.         brace_stack.Pop();
  1452.     }
  1453.  
  1454.     current_token -> SetKind(TK_RBRACE);
  1455.  
  1456.     cursor++;
  1457.  
  1458.     return;
  1459. }
  1460.  
  1461.  
  1462. /**********************************************************************/
  1463. /*                           CLASSIFY_LPAREN:                         */
  1464. /**********************************************************************/
  1465. void Scanner::ClassifyLparen()
  1466. {
  1467.     current_token -> SetKind(TK_LPAREN);
  1468.  
  1469.     cursor++;
  1470.  
  1471.     return;
  1472. }
  1473.  
  1474.  
  1475. /**********************************************************************/
  1476. /*                           CLASSIFY_RPAREN:                         */
  1477. /**********************************************************************/
  1478. void Scanner::ClassifyRparen()
  1479. {
  1480.     current_token -> SetKind(TK_RPAREN);
  1481.  
  1482.     cursor++;
  1483.  
  1484.     return;
  1485. }
  1486.  
  1487.  
  1488. /**********************************************************************/
  1489. /*                          CLASSIFY_LBRACKET:                        */
  1490. /**********************************************************************/
  1491. void Scanner::ClassifyLbracket()
  1492. {
  1493.     current_token -> SetKind(TK_LBRACKET);
  1494.  
  1495.     cursor++;
  1496.  
  1497.     return;
  1498. }
  1499.  
  1500.  
  1501. /**********************************************************************/
  1502. /*                          CLASSIFY_RBRACKET:                        */
  1503. /**********************************************************************/
  1504. void Scanner::ClassifyRbracket()
  1505. {
  1506.     current_token -> SetKind(TK_RBRACKET);
  1507.  
  1508.     cursor++;
  1509.  
  1510.     return;
  1511. }
  1512.  
  1513.  
  1514. /**********************************************************************/
  1515. /*                         CLASSIFY_COMPLEMENT:                       */
  1516. /**********************************************************************/
  1517. void Scanner::ClassifyComplement()
  1518. {
  1519.     current_token -> SetKind(TK_TWIDDLE);
  1520.  
  1521.     cursor++;
  1522.  
  1523.     return;
  1524. }
  1525.  
  1526.  
  1527. /**********************************************************************/
  1528. /*                        CLASSIFY_BAD_TOKEN:                         */
  1529. /**********************************************************************/
  1530. void Scanner::ClassifyBadToken()
  1531. {
  1532.     if (++cursor < &lex -> InputBuffer()[lex -> InputBufferLength()]) // not the terminating character?
  1533.     {
  1534.          current_token -> SetKind(0);
  1535.          current_token -> SetSymbol(control.FindOrInsertName(cursor - 1, 1));
  1536.  
  1537.          lex -> bad_tokens.Next().Initialize(StreamError::BAD_TOKEN,
  1538.                                              current_token -> Location(),
  1539.                                              current_token -> Location());
  1540.     }
  1541.     else
  1542.     {
  1543.         current_token -> SetKind(TK_EOF);
  1544.     }
  1545.  
  1546.     return;
  1547. }
  1548.  
  1549.  
  1550. /**********************************************************************/
  1551. /*                        CLASSIFY_QUESTION:                          */
  1552. /**********************************************************************/
  1553. /**********************************************************************/
  1554. void Scanner::ClassifyQuestion()
  1555. {
  1556.     current_token -> SetKind(TK_QUESTION);
  1557.  
  1558.     cursor++;
  1559.  
  1560.     return;
  1561. }
  1562.  
  1563.  
  1564. /**********************************************************************/
  1565. /*                     CLASSIFY_NONASCIIUNICODE:                      */
  1566. /**********************************************************************/
  1567. void Scanner::ClassifyNonAsciiUnicode()
  1568. {
  1569.     if (Code::IsAlpha(*cursor)) // Some kind of non-ascii unicode letter
  1570.         ClassifyId();
  1571.     else 
  1572.         ClassifyBadToken();
  1573.     return;
  1574. }
  1575.  
  1576.  
  1577.